#define vec2 float2
#define vec3 float3
#define vec4 float4
#define rgb xyz
#define rgba xyzw
#define _max(a,b) (a)>(b)?(a):(b)
#define _min(a,b) (a)<(b)?(a):(b)
#define _abs(a)	(float4)( (a).x>0.0f?(a).x:-(a).x, (a).y>0.0f?(a).y:-(a).y, (a).z>0.0f?(a).z:-(a).z, (a).w>0.0f?(a).w:-(a).w)
const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;

vec4 INPUT1(image2d_t src_data,  __global FilterParam* param, vec2 tc)
{
	tc = (vec2)(tc.x, tc.y)*(vec2)(param->origROI[2], param->origROI[3]) + (vec2)(param->origROI[0], param->origROI[1]);
	return read_imagef(src_data, sampler, tc);
}

vec4 INPUT2(image2d_t ovelay1, vec2 tc)
{
	return read_imagef(ovelay1, sampler, tc);
}

float clip_byte(float x)
{
	if(x<1.0f)
		return 0.0f;
	else
	{	
		if (x<2.0f)
			return x - 1.0f;
		else 
			return 1.0f;
	}
		
}

#define FILTER_PI 3.1415926f

__kernel void MAIN(
      __read_only image2d_t src_data,
      __write_only image2d_t dest_data,        //Data in global memory
	   __global FilterParam* param,
	  float aPercent, //0-100
	  int alpha)  //[0-100]		// the gpu items/threads should be newW*newH
{
	int W = get_global_size(0);
	int H = get_global_size(1);
	int textH = param->height[0];;
	float iGlobalTime = param->cur_time / param->total_time;
	
	float lowest = 0.0f;
	float hight = 0.5f;
	
	int2 coordinate = (int2)(get_global_id(0), get_global_id(1));
	vec2 gl_FragCoord = (vec2)(get_global_id0( param), get_global_id1( param));
	int2 iResolution = (int2)(W,H);
	vec2 tc = gl_FragCoord / (float2)(W,H);
	
	int aDepth = ( aPercent/ 12 + 1) * W/640.0f ;
	
	float4 outColor;
	float4 colA = INPUT1(src_data, param, tc );
	float4 colB = INPUT1(src_data, param,  (float2)( tc.x + (float)aDepth/W, tc.y) );
	
	if(gl_FragCoord.x < W- aDepth)
	{
		float aValueX = colA.z + colA.y + colA.x + colA.y;
		float aValueI = colB.z + colB.y + colB.x + colB.y;
		aValueX = aValueX - aValueI;
		float gray = clip_byte(aValueX/4.0f + 1.5f);
		outColor.xyz = (float3)(gray);
		outColor.w = colA.w;
		
	}else{
		outColor.xyz = (float3)(0.5f);
		outColor.w = colA.w;
	}
	
	write_imagef(dest_data, coordinate, colA*(1.0f - (float)alpha/100.0f) + outColor*(float)alpha/100.0f);
}

